"""
Glue for returning descriptive statistics.
"""

import os

import numpy as np
from scipy import stats

from statsmodels.stats.descriptivestats import sign_test

#
#
# ============================================
#       Univariate Descriptive Statistics
# ============================================
#


def descstats(data, cols=None, axis=0):
    """
    Prints descriptive statistics for one or multiple variables.

    Parameters
    ----------
    data: numpy array
        `x` is the data

    v: list, optional
        A list of the column number of variables.
        Default is all columns.

    axis: 1 or 0
        axis order of data.  Default is 0 for column-ordered data.

    Examples
    --------
    >>> descstats(data.exog,v=['x_1','x_2','x_3'])
    """

    x = np.array(data)  # or rather, the data we're interested in
    if cols is None:
        x = x[:, None]
    if cols is None and x.ndim == 1:
        x = x[:, None]

    if x.shape[1] == 1:
        desc = """
            ---------------------------------------------
            Univariate Descriptive Statistics
            ---------------------------------------------

            Var. Name   %(name)12s
            ----------
            Obs.          %(nobs)22i  Range                  %(range)22s
            Sum of Wts.   %(sum)22s  Coeff. of Variation     %(coeffvar)22.4g
            Mode          %(mode)22.4g  Skewness                %(skewness)22.4g
            Repeats       %(nmode)22i  Kurtosis                %(kurtosis)22.4g
            Mean          %(mean)22.4g  Uncorrected SS          %(uss)22.4g
            Median        %(median)22.4g  Corrected SS            %(ss)22.4g
            Variance      %(variance)22.4g  Sum Observations        %(sobs)22.4g
            Std. Dev.     %(stddev)22.4g
            """ % {
            "name": cols,
            "sum": "N/A",
            "nobs": len(x),
            "mode": stats.mode(x)[0][0],
            "nmode": stats.mode(x)[1][0],
            "mean": x.mean(),
            "median": np.median(x),
            "range": "(" + str(x.min()) + ", " + str(x.max()) + ")",
            "variance": x.var(),
            "stddev": x.std(),
            "coeffvar": stats.variation(x),
            "skewness": stats.skew(x),
            "kurtosis": stats.kurtosis(x),
            "uss": np.sum(x**2, axis=0),
            "ss": np.sum((x - x.mean()) ** 2, axis=0),
            "sobs": np.sum(x),
        }
        desc += """

        Percentiles
        -------------
        1  %%          %12.4g
        5  %%          %12.4g
        10 %%          %12.4g
        25 %%          %12.4g

        50 %%          %12.4g

        75 %%          %12.4g
        90 %%          %12.4g
        95 %%          %12.4g
        99 %%          %12.4g
        """ % tuple(
            [
                stats.scoreatpercentile(x, per)
                for per in (1, 5, 10, 25, 50, 75, 90, 95, 99)
            ]
        )
        t, p_t = stats.ttest_1samp(x, 0)
        M, p_M = sign_test(x)
        S, p_S = stats.wilcoxon(np.squeeze(x))

        desc += """

        Tests of Location (H0: Mu0=0)
        -----------------------------
        Test                Statistic       Two-tailed probability
        -----------------+-----------------------------------------
        Student's t      |  t {:7.5f}   Pr > |t|   <{:.4f}
        Sign             |  M {:8.2f}   Pr >= |M|  <{:.4f}
        Signed Rank      |  S {:8.2f}   Pr >= |S|  <{:.4f}

        """.format(
            t, p_t, M, p_M, S, p_S
        )
        # Should this be part of a 'descstats'
        # in any event these should be split up, so that they can be called
        # individually and only returned together if someone calls summary
        # or something of the sort

    elif x.shape[1] > 1:
        desc = (
            """
    Var. Name   |     Obs.        Mean    Std. Dev.           Range
    ------------+--------------------------------------------------------"""
            + os.linesep
        )

        for var in range(x.shape[1]):
            xv = x[:, var]
            kwargs = {
                "name": var,
                "obs": len(xv),
                "mean": xv.mean(),
                "stddev": xv.std(),
                "range": "(" + str(xv.min()) + ", " + str(xv.max()) + ")" + os.linesep,
            }
            desc += (
                "%(name)15s %(obs)9i %(mean)12.4g %(stddev)12.4g "
                "%(range)20s" % kwargs
            )
    else:
        raise ValueError("data not understood")

    return desc


# if __name__=='__main__':
# test descstats
#    import os
#    loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
#    relpath=(load_dataset(loc))
#    dta=np.genfromtxt(relpath, delimiter=",")
#    descstats(dta,['stpop'])
#    raw_input('Hit enter for multivariate test')
#    descstats(dta,['stpop','avginc','vio'])

# with plain arrays
#    import string2dummy as s2d
#    dts=s2d.string2dummy(dta)
#    ndts=np.vstack(dts[col] for col in dts.dtype.names)
# observations in columns and data in rows
# is easier for the call to stats

# what to make of
# ndts=np.column_stack(dts[col] for col in dts.dtype.names)
# ntda=ntds.swapaxis(1,0)
# ntda is ntds returns false?

# or now we just have detailed information about the different strings
# would this approach ever be inappropriate for a string typed variable
# other than dates?
#    descstats(ndts, [1])
#    raw_input("Enter to try second part")
#    descstats(ndts, [1,20,3])

if __name__ == "__main__":
    import statsmodels.api as sm

    data = sm.datasets.longley.load()
    data.exog = sm.add_constant(data.exog, prepend=False)
    sum1 = descstats(data.exog)
    sum1a = descstats(data.exog[:, :1])

    #    loc='http://eagle1.american.edu/~js2796a/data/handguns_data.csv'
    #    dta=np.genfromtxt(loc, delimiter=",")
    #    summary2 = descstats(dta,['stpop'])
    #    summary3 =  descstats(dta,['stpop','avginc','vio'])
    # TODO: needs a by argument
    #    summary4 = descstats(dta) this fails
    # this is a bug
    # p = dta[['stpop']]
    # p.view(dtype = np.float, type = np.ndarray)
    # this works
    # p.view(dtype = np.int, type = np.ndarray)

    # This is *really* slow ###
    if os.path.isfile("./Econ724_PS_I_Data.csv"):
        data2 = np.genfromtxt("./Econ724_PS_I_Data.csv", delimiter=",")
        sum2 = descstats(data2.ahe)
        sum3 = descstats(np.column_stack((data2.ahe, data2.yrseduc)))
        sum4 = descstats(np.column_stack([data2[_] for _ in data2.dtype.names]))
